################################################################################################
########  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
########  Replication for quantitative analysis in Chapter 3
########  R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
########  Platform: x86_64-apple-darwin15.6.0 (64-bit)
####################################################################################

rm(list = ls())
library(stargazer)
library(coefplot)
library(reldist) #for gini
library(openxlsx)
library(interplot)
library(broom)
library(dotwhisker)
library(sjPlot)
library(sjlabelled)
library(sjmisc)

# Read in dataset at the village level:
gocosp<-read.xlsx("chapter3.xlsx")

#computing key variables :
gocosp$migrsum<-gocosp$fromPrussia +gocosp$fromKresAustr+gocosp$fromKresyRus+gocosp$partRus +gocosp$fromWestEur+ gocosp$fromAustr +gocosp$fromUSSR +gocosp$Other
gocosp$totPop<-round(gocosp$migrsum/gocosp$shareNaplyw)
gocosp$MigDivV<-1-(((gocosp$fromKresAustr+gocosp$fromKresyRus+gocosp$fromUSSR)/gocosp$migrsum)^2+((gocosp$fromPrussia+gocosp$partRus+gocosp$fromAustr)/gocosp$migrsum)^2+((gocosp$fromWestEur+gocosp$Other)/gocosp$migrsum)^2)

gocosp$shareUSSR<-(gocosp$fromKresAustr+gocosp$fromKresyRus+gocosp$fromUSSR)/gocosp$totPop
gocosp$shareCP<-(gocosp$fromPrussia+gocosp$partRus+gocosp$fromAustr)/gocosp$totPop
gocosp$shareOther<-(gocosp$fromWestEur+gocosp$Other)/gocosp$totPop
#gocosp$ShareAut is share autochthonous

gocosp$frac4<-1-(gocosp$shareUSSR^2+gocosp$shareCP^2+gocosp$shareOther^2+gocosp$ShareAut^2)
gocosp$frac4[gocosp$frac4<0]<-0 ##Slightly below zero, fix this.

## alternative village type indicator, using different thredholds ##
gocosp$type<-rep(NA, nrow(gocosp))
gocosp$type[gocosp$Resettled.as.whole==1]<-"resettled as a whole"
gocosp$type[gocosp$ShareAut>0.80]<-"indigenous"
gocosp$type[gocosp$Resettled.as.whole==0 & gocosp$ShareAut<0.80 & gocosp$MigDivV>0.2386]<-"heterogeneous"
gocosp$type[gocosp$Resettled.as.whole==0 & gocosp$ShareAut<0.80 & gocosp$MigDivV<0.2386]<-"homogeneous"

gocosp$type<-relevel(factor(gocosp$type), ref = "indigenous")

## alternative levels ##
gocosp$type2<-rep(NA, nrow(gocosp))
gocosp$type2[gocosp$Resettled.as.whole==1]<-"resettled as a whole"
gocosp$type2[gocosp$ShareAut>0.70]<-"indigenous"
gocosp$type2[gocosp$Resettled.as.whole==0 & gocosp$ShareAut<0.70 & gocosp$MigDivV>=0.4]<-"very heterogeneous"
gocosp$type2[gocosp$Resettled.as.whole==0 & gocosp$ShareAut<0.70 & gocosp$MigDivV>0.05 & gocosp$MigDivV<0.4]<-"moderately heterogeneous"
gocosp$type2[gocosp$Resettled.as.whole==0 & gocosp$ShareAut<0.70 & gocosp$MigDivV<=0.05]<-"homogeneous"

gocosp$type2<-relevel(factor(gocosp$type2), ref = "indigenous")


# Variables from 1939 German census:
gocosp$ShareFarmsOver20ha<-(gocosp$Gosp20to100+gocosp$GospOver100)/gocosp$GospodRolne

gocosp$ShareAgric<-gocosp$LudnoscRoln/gocosp$LudnoscOgolem

summary(gocosp$shareNaplyw)

##MAIN ANALYSIS:
#First we look at the resettled villageS: 
log1<-glm(OSP ~ shareNaplyw+ShareAgric+ ShareFarmsOver20ha +log(LudnoscOgolem), data=gocosp, family="binomial")

log2<-glm(OSP ~ frac4+ShareAgric+ ShareFarmsOver20ha +log(LudnoscOgolem), data=gocosp, family="binomial")

log3<-glm(OSP ~ type+ShareAgric+ ShareFarmsOver20ha +log(LudnoscOgolem), data=gocosp, family="binomial")

log4<-glm(OSP ~ type2+ShareAgric+ ShareFarmsOver20ha +log(LudnoscOgolem), data=gocosp, family="binomial")


##TABLE IN THE CHAPTER APPENDIX ##
stargazer(log1, log2, log3, log4, digits=2, omit.stat=c("ser","f", "rsq"), no.space=TRUE, covariate.labels=c("Share migrants", "Fractionalization index", "Type: Heterogeneous", "Type: Homogeneous", "Type: Resettled as a whole", "Type: Homogeneous", "Type: Moderately heterogeneous", "Type: Resettled as a whole", "Type: Very heterogeneous", "Share in agriculture", "Share farms over 20 ha", "Ln population"))

##Plotting the results: 

##Figure 3.1: Predicted probability of the presence of a volunteer fire brigade relative to the share of migrants (left graph) and the level of cultural diversity (right graph).
graph_sharemig <- plot_model(log1, type = "pred", terms = "shareNaplyw [all]", title = "",
                             axis.title = c("Share migrants", "Predicted probability of a volunteer fire brigade in a village")) +
  theme_bw() +
  theme(axis.text = element_text(size = 12),  # Increase size of axis labels
        axis.title = element_text(size = 14))  # Increase size of axis titles

ggsave("figure3.1a", plot=graph_sharemig, device="jpg", width=8, height=8, units="in", dpi=300)

#Results Plot for Fractionalization Index
graph_frac4 <- plot_model(log2, type = "pred", terms = "frac4 [all]", title = "",
                          axis.title = c("Fractionalization index", "Predicted probability of a volunteer fire brigade in a village")) +
               theme_bw() +
               theme(axis.text = element_text(size = 12),  # Increase size of axis labels
               axis.title = element_text(size = 14))  # Increase size of axis titles


ggsave("figure3.1b", plot=graph_frac4, device="jpg", width=8, height=8, units="in", dpi=300)


#Figure 3.2
figure3.2 <- plot_model(log3, type = "est", transform = NULL, vline.color = "grey", 
                         axis.labels = c("Type: Resettled together", "Type: Homogeneous", "Type: Heterogeneous"),
                         title="",
                         axis.title = "Log-odds of a volunteer fire brigade", 
                         rm.terms = c("ShareAgric", "ShareFarmsOver20ha", "log(LudnoscOgolem)"),
                         show.values = TRUE, se = TRUE)  + theme_sjplot2() +
  theme(axis.text.x = element_text(size = 12),  # Increase x-axis label font size
        axis.text.y = element_text(size = 12),  # Increase y-axis label font size
        axis.title = element_text(size = 14))   # Increase axis title font size

ggsave("fig3.2", plot=figure3.2, device="jpg", width=8, height=8, units="in", dpi=300)

#Note: type = "est" indicates coefficient plot. transform = NULL means leave raw coefficients instead of odds ratios. se = TRUE means display std. errors (if excluded, will show different error bars).


